*** How worried should we be? The implications of fabricated survey data for political science
*** Table 3: Item-Level Effects of Fabricated Data (Random, Speeding, and Middling Responses)
*** The values generated by this do file also correspond to Table A10 (unabridged version of Table 3 in the main text)

set more off

* set directory to location of dataset in following line
cd "C:\~\Downloads\"

** Random Responses

use "VEN_fraud_data.dta", clear

gen env2b = env2b1 if env2b2 == 999999
replace env2b = env2b2 if env2b1 == 999999
gen drk1 = drk11 if drk12 == 999999
replace drk1 = drk12 if drk11 == 999999

set seed 2414830

* replace likely fraudulent cases with random data
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	recode `x' (888888 988888 999999 99 = .)
	qui summ `x'
	replace `x' =  runiformint(1, r(max)) if likelyfraud == 1 & cem_matched == 1
}
*

* diff of means
postfile qdata str32 question clean_mu clean_sd fraud_mu fraud_sd diff diff_se n1 n2 dof pval using "questions_mean.dta", replace
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	di "`x'"
	summ `x'
	gen `x'_rs = (`x' - r(min))/(r(max) - r(min)) * 100
	ttest `x'_rs if cem_matched == 1, by(likelyfraud) unequal
	post qdata ("`x'") (r(mu_1)) (r(sd_1)) (r(mu_2)) (r(sd_2)) (r(mu_1)-r(mu_2)) (r(se)) (r(N_1)) (r(N_2)) (r(df_t)) (r(p))
}
*
postclose qdata
* diff of standard deviation
postfile qdata str32 question sd_clean sd_fraud diff_sd_fstat diff_sd_pval using "questions_sd.dta", replace
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	di "`x'"
	sdtest `x'_rs if cem_matched == 1, by(likelyfraud)
	post qdata ("`x'") (r(sd_1)) (r(sd_2)) (r(F)) (r(p))
}
*
postclose qdata
* scale of items
postfile qdata str32 question scale using "questions_sc.dta", replace
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	di "`x'"
	qui summ `x'
	post qdata ("`x'") (r(max))
}
*
postclose qdata

use questions_mean.dta, clear
merge 1:1 question using questions_sd.dta, nogen
merge 1:1 question using questions_sc.dta, nogen
save questions_differences.dta, replace

erase questions_sd.dta
erase questions_mean.dta
erase questions_sc.dta

gen mean_comp_test = 1 if pval < .05
replace mean_comp_test = 0 if pval >= .05
gen var_comp_test = 1 if diff_sd_pval < .05
replace var_comp_test = 0 if diff_sd_pval >= .05
gen mean_comp_test90 = 1 if pval < .10
replace mean_comp_test90 = 0 if pval >= .10
gen var_comp_test90 = 1 if diff_sd_pval < .10
replace var_comp_test90 = 0 if diff_sd_pval >= .10

save questions_differences.dta, replace

** with bonferroni and sidak corrections
use questions_differences.dta, clear
* bonferroni
gen mean_comp_test_bonf = 1 if pval < (.05/_N)
replace mean_comp_test_bonf = 0 if pval >= (.05/_N)
gen mean_comp_test_bonf90 = 1 if pval < (.1/_N)
replace mean_comp_test_bonf90 = 0 if pval >= (.1/_N)
gen var_comp_test_bonf = 1 if diff_sd_pval < (.05/_N)
replace var_comp_test_bonf = 0 if diff_sd_pval >= (.05/_N)
gen var_comp_test_bonf90 = 1 if diff_sd_pval < (.1/_N)
replace var_comp_test_bonf90 = 0 if diff_sd_pval >= (.1/_N)
gen magn_diff_sd = abs(diff) / clean_sd

* holm's step-down procedure (alpha = .10)
gsort pval
gen mean_pval_rank = _n
gen mean_test_holm_alpha = .10 / (_N - mean_pval_rank + 1)
gen mean_comp_test_holm = 1 if pval < mean_test_holm_alpha
replace mean_comp_test_holm = 0 if pval > mean_test_holm_alpha

gsort diff_sd_pval
gen var_pval_rank = _n
gen var_test_holm_alpha = .10 / (_N - var_pval_rank + 1)
gen var_comp_test_holm = 1 if pval < var_test_holm_alpha
replace var_comp_test_holm = 0 if pval > var_test_holm_alpha

* hochberg's step-down procedure (alpha = .10)
gsort -pval
gen mean_pval_rank_r = _n
gen mean_test_hoch_alpha = .10 / (_N - mean_pval_rank_r + 1)
gen mean_comp_test_hoch = 1 if pval < mean_test_hoch_alpha
replace mean_comp_test_hoch = 0 if pval > mean_test_hoch_alpha

gsort -diff_sd_pval
gen var_pval_rank_r = _n
gen var_test_hoch_alpha = .10 / (_N - var_pval_rank_r + 1)
gen var_comp_test_hoch = 1 if pval < var_test_hoch_alpha
replace var_comp_test_hoch = 0 if pval > var_test_hoch_alpha

*** TABLE 3 RESULTS ***
** Column 2 Row 1 (Difference in means) range of values
tab1 mean_comp_test_bonf90 mean_comp_test_holm mean_comp_test_hoch mean_comp_test90
** Column 2 Row 2 (Average magnitude) range of values
summ magn_diff_sd if mean_comp_test_bonf90 == 1
summ magn_diff_sd if mean_comp_test_holm == 1
summ magn_diff_sd if mean_comp_test_hoch == 1
summ magn_diff_sd if mean_comp_test90 == 1
** Column 2 Row 3 (Difference in variances) range of values
tab1 var_comp_test_bonf90 var_comp_test_holm var_comp_test_hoch var_comp_test90

** Speeding Responses

use "VEN_fraud_data.dta", clear
gen pais = 16

gen env2b = env2b1 if env2b2 == 999999
replace env2b = env2b2 if env2b1 == 999999
gen drk1 = drk11 if drk12 == 999999
replace drk1 = drk12 if drk11 == 999999

drop if likelyfraud == 1 & cem_matched == 1
* add speeding data
append using "venezuela_speed.dta", force

* non-response
postfile qdata str32 question nonresponse_clean nonresponse_fraud nr_diff diff_nr_zstat using "questions_nr.dta", replace
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	di "`x'"
	gen `x'_nr = 1 if inlist(`x', 888888, 988888, 99)
	replace `x'_nr = 0 if `x'_nr == . & `x' != 999999
	replace `x'_nr = . if `x' == 999999
	prtest `x'_nr if cem_matched == 1, by(likelyfraud)
	post qdata ("`x'") (r(P_1)) (r(P_2)) (r(P_1)-r(P_2)) (r(z))
}
*
postclose qdata
* recoding for analysis
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	recode `x' (888888 988888 999999 99 = .)
	*qui summ `x'
	*replace `x' = `x'_mode if likelyfraud == 1 & cem_matched == 1 & `x'_mode != .
	*replace `x' = runiformint(3, 4) if likelyfraud == 1 & cem_matched == 1 & `x'_mode == .
}
*

* diff of means
postfile qdata str32 question clean_mu clean_sd fraud_mu fraud_sd diff diff_se n1 n2 dof pval using "questions_mean.dta", replace
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	di "`x'"
	summ `x'
	gen `x'_rs = (`x' - r(min))/(r(max) - r(min)) * 100
	ttest `x'_rs if cem_matched == 1, by(likelyfraud) unequal
	post qdata ("`x'") (r(mu_1)) (r(sd_1)) (r(mu_2)) (r(sd_2)) (r(mu_1)-r(mu_2)) (r(se)) (r(N_1)) (r(N_2)) (r(df_t)) (r(p))
}
*
postclose qdata
* diff of standard deviation
postfile qdata str32 question sd_clean sd_fraud diff_sd_fstat diff_sd_pval using "questions_sd.dta", replace
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	di "`x'"
	sdtest `x'_rs if cem_matched == 1, by(likelyfraud)
	post qdata ("`x'") (r(sd_1)) (r(sd_2)) (r(F)) (r(p))
}
*
postclose qdata
* scale of items
postfile qdata str32 question scale using "questions_sc.dta", replace
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	di "`x'"
	qui summ `x'
	post qdata ("`x'") (r(max))
}
*
postclose qdata

use questions_mean.dta, clear
merge 1:1 question using questions_sd.dta, nogen
merge 1:1 question using questions_sc.dta, nogen
merge 1:1 question using questions_nr.dta, nogen
save questions_differences.dta, replace

erase questions_sd.dta
erase questions_mean.dta
erase questions_sc.dta
erase questions_nr.dta

gen mean_comp_test = 1 if pval < .05
replace mean_comp_test = 0 if pval >= .05
gen var_comp_test = 1 if diff_sd_pval < .05
replace var_comp_test = 0 if diff_sd_pval >= .05
gen mean_comp_test90 = 1 if pval < .10
replace mean_comp_test90 = 0 if pval >= .10
gen var_comp_test90 = 1 if diff_sd_pval < .10
replace var_comp_test90 = 0 if diff_sd_pval >= .10
gen nr_diff_test = 1 if abs(diff_nr_zstat) > 1.96
replace nr_diff_test = 0 if abs(diff_nr_zstat) <= 1.96
gen nr_diff_test90 = 1 if abs(diff_nr_zstat) > 1.64
replace nr_diff_test90 = 0 if abs(diff_nr_zstat) <= 1.64
save questions_differences.dta, replace

** with bonferroni and sidak corrections
use questions_differences.dta, clear
* bonferroni
gen mean_comp_test_bonf = 1 if pval < (.05/_N)
replace mean_comp_test_bonf = 0 if pval >= (.05/_N)
gen mean_comp_test_bonf90 = 1 if pval < (.1/_N)
replace mean_comp_test_bonf90 = 0 if pval >= (.1/_N)
gen var_comp_test_bonf = 1 if diff_sd_pval < (.05/_N)
replace var_comp_test_bonf = 0 if diff_sd_pval >= (.05/_N)
gen var_comp_test_bonf90 = 1 if diff_sd_pval < (.1/_N)
replace var_comp_test_bonf90 = 0 if diff_sd_pval >= (.1/_N)
gen nr_diff_test_bonf = 1 if abs(diff_nr_zstat) > 3.31216
replace nr_diff_test_bonf = 0 if abs(diff_nr_zstat) <= 3.31216
gen nr_diff_test_bonf90 = 1 if abs(diff_nr_zstat) > 3.12637
replace nr_diff_test_bonf90 = 0 if abs(diff_nr_zstat) <= 3.12637
gen magn_diff_sd = abs(diff) / clean_sd


* holm's step-down procedure (alpha = .10)
gsort pval
gen mean_pval_rank = _n
gen mean_test_holm_alpha = .10 / (_N - mean_pval_rank + 1)
gen mean_comp_test_holm = 1 if pval < mean_test_holm_alpha
replace mean_comp_test_holm = 0 if pval > mean_test_holm_alpha

gsort diff_sd_pval
gen var_pval_rank = _n
gen var_test_holm_alpha = .10 / (_N - var_pval_rank + 1)
gen var_comp_test_holm = 1 if pval < var_test_holm_alpha
replace var_comp_test_holm = 0 if pval > var_test_holm_alpha

gen abs_diff_nr_zstat = abs(diff_nr_zstat) 
gsort -abs_diff_nr_zstat
gen nr_z_rank = _n
gen nr_test_holm_alpha = abs(invnormal((.10 / (_N - nr_z_rank + 1))/2))
gen nr_comp_test_holm = 1 if abs_diff_nr_zstat > nr_test_holm_alpha
replace nr_comp_test_holm = 0 if abs_diff_nr_zstat < nr_test_holm_alpha

* hochberg's step-down procedure (alpha = .10)
gsort -pval
gen mean_pval_rank_r = _n
gen mean_test_hoch_alpha = .10 / (_N - mean_pval_rank_r + 1)
gen mean_comp_test_hoch = 1 if pval < mean_test_hoch_alpha
replace mean_comp_test_hoch = 0 if pval > mean_test_hoch_alpha

gsort -diff_sd_pval
gen var_pval_rank_r = _n
gen var_test_hoch_alpha = .10 / (_N - var_pval_rank_r + 1)
gen var_comp_test_hoch = 1 if pval < var_test_hoch_alpha
replace var_comp_test_hoch = 0 if pval > var_test_hoch_alpha

gsort abs_diff_nr_zstat
gen nr_z_rank_r = _n
gen nr_test_hoch_alpha = abs(invnormal((.10 / (_N - nr_z_rank_r + 1))/2))
gen nr_comp_test_hoch = 1 if abs_diff_nr_zstat > nr_test_hoch_alpha
replace nr_comp_test_hoch = 0 if abs_diff_nr_zstat < nr_test_hoch_alpha

*** TABLE 3 RESULTS ***
** Column 3 Row 1 (Difference in means) range of values
tab1 mean_comp_test_bonf90 mean_comp_test_holm mean_comp_test_hoch mean_comp_test90
** Column 3 Row 2 (Average magnitude) range of values
summ magn_diff_sd if mean_comp_test_bonf90 == 1
summ magn_diff_sd if mean_comp_test_holm == 1
summ magn_diff_sd if mean_comp_test_hoch == 1
summ magn_diff_sd if mean_comp_test90 == 1
** Column 3 Row 3 (Difference in variances) range of values
tab1 var_comp_test_bonf90 var_comp_test_holm var_comp_test_hoch var_comp_test90
** Column 3 Row 4 (Item nonresponse) range of values
tab1 nr_diff_test_bonf90 nr_comp_test_holm nr_comp_test_hoch nr_diff_test90

** Middling Responses

use "VEN_fraud_data.dta", clear

gen env2b = env2b1 if env2b2 == 999999
replace env2b = env2b2 if env2b1 == 999999
gen drk1 = drk11 if drk12 == 999999
replace drk1 = drk12 if drk11 == 999999

set seed 2414830

* replace likely fraudulent cases with centered data
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	recode `x' (888888 988888 999999 99 = .)
	qui summ `x'
	qui replace `x' = round(rnormal(2, 3/10)) if likelyfraud == 1 & cem_matched == 1 & r(max) == 3
	qui replace `x' = 3 if likelyfraud == 1 & cem_matched == 1 & r(max) == 3 & `x' > 3
	qui replace `x' = round(rnormal(2.5, 4/10)) if likelyfraud == 1 & cem_matched == 1 & r(max) == 4
	qui replace `x' = 4 if likelyfraud == 1 & cem_matched == 1 & r(max) == 4 & `x' > 4
	qui replace `x' = round(rnormal(3, 5/10)) if likelyfraud == 1 & cem_matched == 1 & r(max) == 5
	qui replace `x' = 5 if likelyfraud == 1 & cem_matched == 1 & r(max) == 5 & `x' > 5
	qui replace `x' = round(rnormal(4, 7/10)) if likelyfraud == 1 & cem_matched == 1 & r(max) == 7
	qui replace `x' = 7 if likelyfraud == 1 & cem_matched == 1 & r(max) == 7 & `x' > 7
	qui replace `x' = round(rnormal(5.5, 10/10)) if likelyfraud == 1 & cem_matched == 1 & r(max) == 10
	qui replace `x' = 10 if likelyfraud == 1 & cem_matched == 1 & r(max) == 10 & `x' > 10
	qui replace `x' = 1 if `x' < 1
}
*

* diff of means
postfile qdata str32 question clean_mu clean_sd fraud_mu fraud_sd diff diff_se n1 n2 dof pval using "questions_mean.dta", replace
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	di "`x'"
	summ `x'
	gen `x'_rs = (`x' - r(min))/(r(max) - r(min)) * 100
	ttest `x'_rs if cem_matched == 1, by(likelyfraud) unequal
	post qdata ("`x'") (r(mu_1)) (r(sd_1)) (r(mu_2)) (r(sd_2)) (r(mu_1)-r(mu_2)) (r(se)) (r(N_1)) (r(N_2)) (r(df_t)) (r(p))
}
*
postclose qdata
* diff of sd
postfile qdata str32 question sd_clean sd_fraud diff_sd_fstat diff_sd_pval using "questions_sd.dta", replace
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	di "`x'"
	sdtest `x'_rs if cem_matched == 1, by(likelyfraud)
	post qdata ("`x'") (r(sd_1)) (r(sd_2)) (r(F)) (r(p))
}
*
postclose qdata
* scale of items
postfile qdata str32 question scale using "questions_sc.dta", replace
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	di "`x'"
	qui summ `x'
	post qdata ("`x'") (r(max))
}
*
postclose qdata

use questions_mean.dta, clear
merge 1:1 question using questions_sd.dta, nogen
merge 1:1 question using questions_sc.dta, nogen
save questions_differences.dta, replace

erase questions_sd.dta
erase questions_mean.dta
erase questions_sc.dta

gen mean_comp_test = 1 if pval < .05
replace mean_comp_test = 0 if pval >= .05
gen var_comp_test = 1 if diff_sd_pval < .05
replace var_comp_test = 0 if diff_sd_pval >= .05
gen mean_comp_test90 = 1 if pval < .10
replace mean_comp_test90 = 0 if pval >= .10
gen var_comp_test90 = 1 if diff_sd_pval < .10
replace var_comp_test90 = 0 if diff_sd_pval >= .10
save questions_differences.dta, replace

** with bonferroni and sidak corrections
use questions_differences.dta, clear
* bonferroni
gen mean_comp_test_bonf = 1 if pval < (.05/_N)
replace mean_comp_test_bonf = 0 if pval >= (.05/_N)
gen mean_comp_test_bonf90 = 1 if pval < (.1/_N)
replace mean_comp_test_bonf90 = 0 if pval >= (.1/_N)
gen var_comp_test_bonf = 1 if diff_sd_pval < (.05/_N)
replace var_comp_test_bonf = 0 if diff_sd_pval >= (.05/_N)
gen var_comp_test_bonf90 = 1 if diff_sd_pval < (.1/_N)
replace var_comp_test_bonf90 = 0 if diff_sd_pval >= (.1/_N)
gen magn_diff_sd = abs(diff) / clean_sd

* holm's step-down procedure (alpha = .10)
gsort pval
gen mean_pval_rank = _n
gen mean_test_holm_alpha = .10 / (_N - mean_pval_rank + 1)
gen mean_comp_test_holm = 1 if pval < mean_test_holm_alpha
replace mean_comp_test_holm = 0 if pval > mean_test_holm_alpha

gsort diff_sd_pval
gen var_pval_rank = _n
gen var_test_holm_alpha = .10 / (_N - var_pval_rank + 1)
gen var_comp_test_holm = 1 if pval < var_test_holm_alpha
replace var_comp_test_holm = 0 if pval > var_test_holm_alpha

* hochberg's step-down procedure (alpha = .10)
gsort -pval
gen mean_pval_rank_r = _n
gen mean_test_hoch_alpha = .10 / (_N - mean_pval_rank_r + 1)
gen mean_comp_test_hoch = 1 if pval < mean_test_hoch_alpha
replace mean_comp_test_hoch = 0 if pval > mean_test_hoch_alpha

gsort -diff_sd_pval
gen var_pval_rank_r = _n
gen var_test_hoch_alpha = .10 / (_N - var_pval_rank_r + 1)
gen var_comp_test_hoch = 1 if pval < var_test_hoch_alpha
replace var_comp_test_hoch = 0 if pval > var_test_hoch_alpha

*** TABLE 3 RESULTS ***
** Column 4 Row 1 (Difference in means) range of values
tab1 mean_comp_test_bonf90 mean_comp_test_holm mean_comp_test_hoch mean_comp_test90
** Column 4 Row 2 (Average magnitude) range of values
summ magn_diff_sd if mean_comp_test_bonf90 == 1
summ magn_diff_sd if mean_comp_test_holm == 1
summ magn_diff_sd if mean_comp_test_hoch == 1
summ magn_diff_sd if mean_comp_test90 == 1
** Column 4 Row 3 (Difference in variances) range of values
tab1 var_comp_test_bonf90 var_comp_test_holm var_comp_test_hoch var_comp_test90
